---
title: "The Effects of Wage Information on Support for Redistributive Policies"
subtitle: "Supplemental Appendix"
author: Emily Thorson and Kris-Stella Trump
date: \today
header-includes:
   - \usepackage{setspace}\doublespacing
   - \setlength\parindent{24pt}
   - \usepackage{pdflscape}
   - \newcommand{\blandscape}{\begin{landscape}}
   - \newcommand{\elandscape}{\end{landscape}}
output: 
  pdf_document:
  number_sections: false
  fig_caption: yes
  toc: true
---

```{=tex}
% set appendix to count tables and figures with an A prefix
\setcounter{page}{1}
\renewcommand{\thefigure}{A\arabic{figure}} \setcounter{figure}{0}
\renewcommand{\thetable}{A\arabic{table}} \setcounter{table}{0}
```


```{r study 1 setup, include=FALSE}
#Note: this replication file prepared using R version 4.3.3 "Angel Food Cake". All packages updated on 04.30.2024.

#use here package to manage file paths
here::i_am("Supplemental Appendix.Rmd")

#load some packages, set some options
knitr::opts_chunk$set(echo = TRUE)
library(rmarkdown)
library(haven)
library(tidyverse)
library(stargazer)
library(ggplot2)
library(grid)
library(gridExtra)
library(psych)
library(here)
options(scipen=999, digits=1)

#Read in data
study1 <- read_dta(here("study1_public.dta"))

#Recode variables as needed for analysis
study1 <- study1 %>% 
  mutate(cond = as.numeric(as_factor(cond)),#recoding labelled variables as appropriate
         Q3 = as.numeric(as_factor(Q3)), #income 
         Q30 = as.numeric(as_factor(Q30)),
         Q5 = as.numeric(as_factor(Q5)), #education
         Q1 = as.numeric(as_factor(Q1)), #gender
         Q6 = as.numeric(as_factor(Q6)), #partisanship
         cashier = ifelse(cashier<10000, 10000, cashier), #bottom- and top-code low and high salary estimates
         cashier = ifelse(cashier>80000, 80000, cashier),
         fastfood = ifelse(fastfood<10000, 10000, fastfood),
         fastfood = ifelse(fastfood>80000, 80000, fastfood),
         childcare = ifelse(childcare<10000, 10000, childcare),
         childcare = ifelse(childcare>80000, 80000, childcare),
         aide = ifelse(aide<10000, 10000, aide),
         aide = ifelse(fastfood>80000, 80000, aide),
         retail = ifelse(retail<10000, 10000, retail),
         retail = ifelse(retail>80000, 80000, retail), 
         cashier_bias = cashier - bls_cashier, #create variables for bias in estimates
         fastfood_bias = fastfood - bls_fastfood,
         childcare_bias = childcare - bls_childcare,
         aide_bias = aide - bls_aide,
         retail_bias = retail - bls_retail,
         average_bias = (cashier_bias + fastfood_bias + childcare_bias + aide_bias + retail_bias)/5,
         average_estimate = (cashier + fastfood + childcare + aide + retail)/5,
         hh_income = ifelse(Q3==10, NA, Q3), #income variables for regression, setting "don't know" to NA
         ind_income = ifelse(Q30==10, NA, Q30),
         education = Q5, #just for ease of regression writing
         college = Q5>5,
         female = Q1==2, #dummies for regression
         republican = Q6==2, 
         democrat = Q6==1,
         hc_any = hc_cashier==1 | hc_fastfood==1 | hc_childcare==1 | hc_aide==1 | hc_retail==1, #know someone 
         hc_sum = hc_cashier + hc_fastfood + hc_childcare + hc_aide + hc_retail, #know how many
         cond_info_net = cond==1, #treatment conditions
         cond_net = cond==2,
         cond_info = cond==3,
         cond_ctrl = cond==4,
         info_yes = ifelse(cond_info==1 | cond_info_net==1, 1, 0), #for interaction model
         net_yes = ifelse(cond_net==1 | cond_info_net==1,1,0),
         foodstamps = (as.numeric(Q14_1)-1)/4, #redistribution, rescaled 0-1 and reverse coded where needed
         welfare = (as.numeric(Q14_3)-1)/4,
         childsupp = (as.numeric(Q14_7)-1)/4,
         medicaid = (3-as.numeric(Q16))/2,
         ubi = (Q31_1)/10,
         minwage = (Q31_4)/10,
         tuition = (Q31_5)/10,
         redistr_scale = (foodstamps+welfare+childsupp+medicaid+ubi+minwage+tuition)/7 #using seven variables for scale
         )

#save stats and then implement exclusions
full_n1 <- nrow(study1)

#Exclusions
study1 <- study1 %>%
  filter(surveytime>quantile(surveytime, 0.1)) #Drop the bottom decile of completion times
    
    
#For convenience later, pull out low-income respondents. Go with ~national median split, i.e. hh income <50k
study1_lowinc <- study1 %>% filter(hh_income<6)

#Also split by whether they know any low-income people
study1_nocontacts <- study1 %>% filter(hc_any==0)
study1_contacts <- study1 %>% filter(hc_any>0)
```

```{r study 2 setup, include=FALSE}
#this chunk sets options, imports data, and prepares data for analysis
knitr::opts_chunk$set(echo = F,
                      digits=2)
options(digits=1, scipen=999) #sets digits in printed numbers; scipen reduces scientific notation
#load packages
library(rmarkdown)
library(psych)
library(tidyverse)
library(ggplot2)
library(qualtRics)
library(RColorBrewer)
library(stargazer)
library(kableExtra)
library(dotwhisker)
library(gridExtra)

#load data, prep data for analysis
study2 <- read_csv(here("study2_public.csv")) %>%
  #for network questions, -99 signifies option was displayed but not selected -> set to 0
  mutate(across(starts_with("network_"), ~recode(., '-99' = 0))) %>%
  #set remaining instances (in other survey items) of displayed but unanswered to NA
  mutate(across(where(is.character), ~ na_if(.,"-99")),
         across(where(is.numeric), ~ na_if(.,-99))
         ) %>%
  #merge salary estimates from different conditions
  mutate(
    #attention check pass
    pass_attention = ifelse((attentioncheck_1==1 & attentioncheck_2==1), 1, 0),
    #partisanship
    dem_lean = replace_na(dem_lean, 0),
    rep_lean = replace_na(rep_lean, 0),
    indep_lean = replace_na(indep_lean,0),
    partisan_republican = (1*(dem_lean==1) + 2*(dem_lean==2) + 3*(indep_lean==1) + 4*(indep_lean==3) + 5*(indep_lean==2) + 6*(rep_lean==2) + 7*(rep_lean==1)),
    democrat = partisan_republican <4,
    republican = partisan_republican>4,
    independent = partisan_republican==4,
    #contact dummies
    retail_contact = ifelse(network_1_retail_2==1 | network_1_retail_3==1, 1, 0),
    fastfood_contact = ifelse(network_1_fastfood_2==1 | network_1_fastfood_3==1, 1, 0),
    health_contact = ifelse(network_1_health_2==1 | network_1_health_3==1, 1, 0),
    waiter_contact = ifelse(network_2_waiter_2==1 | network_2_waiter_3==1, 1, 0),
    childcare_contact = ifelse(network_2_childcare_2==1 | network_2_childcare_3==1, 1, 0),
    any_contact = ifelse(retail_contact==1 | fastfood_contact==1 | health_contact==1 | waiter_contact==1 | childcare_contact==1, 1, 0),
    retail_acq = ifelse(network_1_retail_3==1, 1, 0),
    fastfood_acq = ifelse(network_1_fastfood_3==1, 1, 0),
    health_acq = ifelse(network_1_health_3==1, 1, 0),
    waiter_acq = ifelse(network_2_waiter_3==1, 1, 0),
    childcare_acq = ifelse(network_2_childcare_3==1, 1, 0),
    n_acq = retail_acq + fastfood_acq + health_acq + waiter_acq + childcare_acq,
    retail_friend = ifelse(network_1_retail_2==1, 1, 0),
    fastfood_friend = ifelse(network_1_fastfood_2==1, 1, 0),
    health_friend = ifelse(network_1_health_2==1, 1, 0),
    waiter_friend = ifelse(network_2_waiter_2==1, 1, 0),
    childcare_friend = ifelse(network_2_childcare_2==1, 1, 0),
    n_friend = retail_friend + fastfood_friend + health_friend + waiter_friend + childcare_friend,
    n_contact = n_acq + n_friend,
    any_friend = ifelse(n_friend>0, 1,0),
    #recode policy question on min wage so higher numbers indicate the more liberal position
    min_wage_support = (6-support_2)/5,
    #create a 0 to 1 variable that measures overall support for redistribution
    policy_support = (spend_1 + spend_3 + spend_5 + spend_6 + support_1 + min_wage_support + support_3 + support_4)/40,
    #create a 0 to 1 variable that measures support for low-income-worker oriented policies
    lowinc_policy_support = (support_4 + spend_5)/10,
    #create variables for different treatments
    pure_control = ifelse(condition==5 | condition==6, 1, 0),
    info_treatment = ifelse(condition==5 | condition==6, 0, 1),
    contrast_treatment= ifelse(condition==1 | condition==3, 1, 0),
    nocontrast_treatment = ifelse(condition==2 | condition==4, 1, 0),
    estimates_treatment = ifelse(condition==1 | condition==2, 1, 0),
    noestimates_treatment = ifelse(condition==3 | condition==4, 1, 0),
    #in lucid, people who did not want to indicate hhi or education are coded as -3105; set these to NA
    hhi = na_if(hhi, -3105),
    #preserve original hhi
    hhi_categories = hhi,
    #recode income to thousands USD, taking midpoint of each category
    hhi = 7.5*(hhi==1) + 17.5*(hhi==2) + 22.5*(hhi==3) + 27.5*(hhi==4) + 32.5*(hhi==5) + 37.5*(hhi==6) + 42.5*(hhi==7) + 47.5*(hhi==8) + 52.5*(hhi==9) + 57.5*(hhi==10) + 62.5*(hhi==11) + 67.5*(hhi==12) + 72.5*(hhi==13) + 77.5*(hhi==14) + 82.5*(hhi==15) + 87.5*(hhi==16) + 92.5*(hhi==17) + 97.5*(hhi==18) + 112.5*(hhi==19) + 137.5*(hhi==20) + 162.5*(hhi==21) + 187.5*(hhi==22) + 225*(hhi==23) + 275*(hhi==24),
    education = na_if(education, -3105),
    #recode lucid's partisanship indicator to democrat, republican
    lucid_dem = political_party<4 |  political_party==6,
    lucid_indep = political_party == 4 | political_party==7,
    lucid_rep = political_party>7 | political_party==5,
    #dummies for demographics
    white = ethnicity==1,
    black = ethnicity==2,
    hispanic_dummy = hispanic != 1,
    college = education > 4,
    male = gender==1,
    #the next two for easier plotting
    contrast_plot = ifelse(contrast_treatment==1, 1, ifelse(nocontrast_treatment==1, 2,3)), #1 = hi contr, 2 = lo contr, 3 = ctrl
    est_plot = ifelse(estimates_treatment==1, 1, ifelse(noestimates_treatment==1, 2,3)), #1 = est, 2 = no est, 3 = ctrl
    #salary estimates
    #for occupation-specific analysis, combine the hi and low condition salary estimates for each occupation into one variable each
    #this step not needed for high income estimates as these are only asked in condition 1
    retail_est = ifelse(condition==1, retail_est_hi, retail_est_lo),
    fastfood_est = ifelse(condition==1, fastfood_est_hi, fastfood_est_lo),
    health_est = ifelse(condition==1, health_est_hi, health_est_lo),
    waiter_est = ifelse(condition==1, waiter_est_hi, waiter_est_lo),
    childcare_est = ifelse(condition==1, childcare_est_hi, childcare_est_lo),
    #count how many occupation guesses each respondent made (depends on trtmnt condition and whether they skipped any responses)
    n_lowinc_estimates = !is.na(retail_est) + !is.na(fastfood_est) + !is.na(health_est) + !is.na(waiter_est) + !is.na(childcare_est),
    n_highinc_estimates = !is.na(dentist_est) + !is.na(software_est) + !is.na(lawyer_est),    
    #winsorize low-income estimates at 100,000
    retail_est = ifelse(retail_est>=100000, 100000, retail_est),
    fastfood_est = ifelse(fastfood_est>=100000, 100000, fastfood_est),
    health_est = ifelse(health_est>=100000, 100000, health_est),
    childcare_est = ifelse(childcare_est>=100000, 100000, childcare_est),
    waiter_est = ifelse(waiter_est>=100000, 100000, waiter_est),
    #did people over/underestimate low wage occupations' incomes
    retail_overest_dummy = as.numeric(retail_est > 29360),
    fastfood_overest_dummy = as.numeric(fastfood_est > 23250),
    health_overest_dummy = as.numeric(health_est > 24200),
    waiter_overest_dummy = as.numeric(waiter_est > 26800),
    childcare_overest_dummy = as.numeric(childcare_est > 25460),
    #was there any over-estimate? note that number guessed varied by condition so different # chances to get it wrong
    lowinc_overest_dummy = retail_overest_dummy==1 | fastfood_overest_dummy==1 | health_overest_dummy==1 | waiter_overest_dummy==1 | childcare_overest_dummy==1,
    #what was the amount of over/underestimation
    retail_overest_amt = retail_est - 29360,
    fastfood_overest_amt = fastfood_est - 23250,
    health_overest_amt = health_est - 24200,
    waiter_overest_amt = waiter_est - 26800,
    childcare_overest_amt = childcare_est - 25460
    ) %>%
  rename(duration_sec = "Duration in seconds")


#save some stats and then implement exclusions
full_n <- nrow(study2)
no_consent <- sum(study2$consent!=4)

#first exclusion: drop non-consenters
study2 <- study2 %>% 
  filter(consent==4)

#how many failed attention check
fail_attention_check <- nrow(study2) - sum(study2$pass_attention==1, na.rm = T)

#pre-registered exclusion 1: attention check
study2 <- study2 %>% 
  #attention check 
  filter(pass_attention==1)

#pre-registered exclusion 2: fast completions 
study2 <- study2 %>% 
  group_by(condition) %>%
  #2% fastest in each condition get dropped
    filter(duration_sec >= quantile(duration_sec, probs=0.02)) %>%
  ungroup()

#break out conditions 1 and 2 for easier reporting 
study2_cond1_hi <- study2 %>%
  filter(condition==1)
study2_cond2_lo <- study2 %>%
  filter(condition==2)

#break out people who have at least one contact
study2_contacts <- study2 %>% 
  filter(any_contact==1)

#save a version as data frame for easy in-text reporting of means etc
study2_df <- as.data.frame(study2)
study2_contacts_df <- as.data.frame(study2_contacts)
```

\tableofcontents
\newpage

# Appendix A1: Description of Pretest (Study 1)


```{r demographics study 1, include=F, eval=T}
options(digits=1)
demographics_table_1 <- tibble(Variable = character(0), 'Study 1' = numeric(0), 'United States Population' = numeric(0))
#Demographic estimates retrieved from ACS 5-year estimates for 2018, accessed at https://www.census.gov/acs/www/data/data-tables-and-tools/data-profiles/ on 2020.05.26
#Partisanship estimate from the 2016 ANES, accessed at https://electionstudies.org/resources/anes-guide/top-tables/?id=21 on 2020.05.26
#median household income manually inferred from categories in hhi variable (taking mid-point of median category)
#median age inferred from variable Q2, taking mid-point of median category
demographics_table_1[1,] <- list("Percent Male", mean(study1$Q1==1, na.rm=T)*100, 49.2)
demographics_table_1[2,] <- list("Median Age*", 48, 37.9)
demographics_table_1[3,] <- list("Percent college educated", mean(study1$college==1, na.rm=T)*100, 31.5)
demographics_table_1[4,] <- list("Median household income*", 62500 , 60293)
demographics_table_1[5,] <- list("Percent Republican", mean(study1$republican==1, na.rm=T)*100, 39)
```

Study 1 was not pre-registered. The data and replication materials (including survey flow, CONSORT chart, and codebook with full question wording) are available on Dataverse ([link https://doi.org/10.7910/DVN/UCUPMF](https://doi.org/10.7910/DVN/UCUPMF)).

*Participant recruitment, sample size, and exclusions*

This study was run in 2014 through the survey company Survey Sampling International. 1059 participants were recruited, and the fastest decile of completions was excluded from analysis, leaving 952 subjects across four experimental conditions. 

*Relationship of hypothesis to experimental design*

The hypothesis was that information about low-income occupations' salaries would increase support for redistributive policy, and that this may especially be the case for people who are reminded that they personally know people in these occupations. The design thus randomly varies whether people are asked about contacts in low-income occupations and whether they are given information about low-income occupations' salaries, before being asked about their support for redistributive policies.

*Human subjects research*

This research was approved by the IRB at George Washington University. The research was classified as "minimal harm", meaning that participation was considered to have no more risk of harm than those present in everyday life. No deception was used. Participants were asked for their consent before starting the survey. Participants were compensated by the survey company SSI according to their prevailing market rates. The researchers do not have access to information regarding exact compensation levels. Potentially identifying information (such as participant identifiers) have been removed from the public version of the dataset.

*Sample characteristics*

The median completion time for the survey was `r median(study1$surveytime)/60` minutes. Although the sample was economically diverse, it was less well-off than the population more generally: just `r sum(study1$hh_income>5, na.rm=TRUE)/nrow(study1)*100`% of the sample reported a household income of more than \$50,000 (compared to 50% of the U.S. population and 45% of the GCS sample). The majority of the sample (`r sum(study1$female==1, na.rm=TRUE)/nrow(study1)*100`%) was female. In total, `r sum(study1$republican==1, na.rm=TRUE)/nrow(study1)*100`% identified as Republican,  `r sum(study1$democrat==1, na.rm=TRUE)/nrow(study1)*100`% as Democrat, and  `r sum(study1$Q6==3, na.rm=TRUE)/nrow(study1)*100`% as Independent. Table A1 summarizes the demographic characteristics of the sample. 

```{r demographics table study 1, results = 'asis', echo=F}
knitr::kable(demographics_table_1, digits=0, caption = "Study 1 Demographics") %>%
    add_footnote("United States estimates derived from ACS 5 year estimates for 2018 and the 2016 ANES. Age and household income are medians (estimated from categorical variables) for the sample but means for the national population.") %>%
    kable_styling(latex_options = "striped")
```

# Appendix A2: Results of Pretest (Study 1)

```{r redistribution measure study 1, echo=F}
options(digits=2)
re_scale <- study1 %>% 
  select(foodstamps, welfare, childsupp,medicaid,ubi,minwage,tuition)
redist_alpha_full <- psych::alpha(re_scale)
redist_alpha <- redist_alpha_full$total$std.alpha
```

In Study 1, the dependent variable is a linear composite of seven policies with redistributive implications. The policies are: increasing spending on food stamps, increasing spending on childcare for low-income families, increasing spending on welfare, support for Medicaid expansion, support for a guaranteed income from the US government, support for reducing the minimum wage (reverse coded), and support for free college tuition for children from low-income families. These seven items form a reliable scale (Cronbach's Alpha `r redist_alpha`).

Table A2 presents the effects of the experimental treatments in the pretest on this scale of support for redistribution. Receiving information about the true incomes of low-wage workers increases support for redistribution; this effect appears stronger among people who have at least one contact in a low-wage occupation and who were asked about (effectively, reminded of) this connection before answering the policy items. The results were used to design Study 2, in which we more fully explore whether information about the incomes of low-wage workers affects support for redistribution.

```{r main results study 1 as regression table, echo=F, results="asis"}
options(digits=3)
supp_redist <- lm(redistr_scale ~ republican + democrat + cond_info + cond_net + cond_info_net, data=study1)
supp_redist_nc <- lm(redistr_scale ~ republican + democrat + cond_info + cond_net + cond_info_net, data=study1_nocontacts)
supp_redist_c <- lm(redistr_scale ~ republican + democrat + cond_info + cond_net + cond_info_net, data=study1_contacts)
stargazer(supp_redist, supp_redist_nc, supp_redist_c, covariate.labels = c("Republican", "Democrat", "Condition: info", "Condition: network", "Condition: info and network", "Intercept"), dep.var.labels = "Support for redistribution scale", column.labels=c("All resp.", "No contacts", "At least 1 contact"), title="Pretest (Study 1): Treatment Effect on Support for Redistribution", digits=2, header=F)
options(digits=1)
```


# Appendix A3: Description of Main Study (Study 2)

```{r demographics study 2, include=F, eval=T}
options(digits=1)
demographics_table_2 <- tibble(Variable = character(0), 'Study 2' = numeric(0), 'United States Population' = numeric(0))
#Demographic estimates retrieved from ACS 5-year estimates for 2018, accessed at https://www.census.gov/acs/www/data/data-tables-and-tools/data-profiles/ on 2020.05.26
#Partisanship estimate from the 2016 ANES, accessed at https://electionstudies.org/resources/anes-guide/top-tables/?id=21 on 2020.05.26
#median household income manually inferred from categories in hhi variable (taking mid-point of median category)
demographics_table_2[1,] <- list("Percent Male", mean(study2$gender==1)*100, 49.2)
demographics_table_2[2,] <- list("Mean Age", mean(study2$age_1, na.rm=T), 37.9)
demographics_table_2[3,] <- list("Percent White", mean(study2$ethnicity==1)*100, 72.7)
demographics_table_2[4,] <- list("Percent Black", mean(study2$ethnicity==2)*100, 12.7)
demographics_table_2[5,] <- list("Percent Hispanic", mean(study2$hispanic!=1, na.rm=T)*100, 17.8)
demographics_table_2[6,] <- list("Percent college educated", mean(study2$education>5, na.rm=T)*100, 31.5)
demographics_table_2[7,] <- list("Median household income*", 42500 , 60293)
demographics_table_2[8,] <- list("Republican (incl. leaner)", sum(study2$lucid_rep, na.rm=T)/length(study2$lucid_rep)*100, 39)
```

This study was pre-registered with the Open Science Framework ([link https://osf.io/sqx9f/](https://osf.io/sqx9f/)). Data and the following replication materials are available on Dataverse ([link https://doi.org/10.7910/DVN/UCUPMF](https://doi.org/10.7910/DVN/UCUPMF)): pre-registration, survey description document including survey flow, CONSORT chart, and codebook with full question wording.

*Participant recruitment, sample size, and exclusions*

The survey was fielded in August 2021 through the survey company Lucid. We intended to recruit 2,400 participants. As specified in the pre-registration, we selected this sample size through a combination of practical constraints and looking to have roughly 200 respondents per comparison condition. Since we planned for exploratory analyses to include analyzing each treatment condition separately by respondents who personally do or do not know someone in a low-income occupation, the “cell” of 400 was going to be divided into two additional groups. The pre-test indicated that a convenience sample may split roughly in two based on responses to this network question, leaving about 200 respondents per “condition” of interest, and 2400 respondents as the overall target sample size.

A convenience sample of `r full_n` respondents who were residents of the United States were routed to the consent page through Lucid (recruitment was based on number of completions). Some respondents did not proceed beyond the consent page. Additionally, respondents who failed an early attention check were not allowed to complete the survey and the 2\% fastest responders in the remaining sample were dropped; these exclusions were pre-registered. After these steps the final sample consisted of `r nrow(study2)` respondents. More detail is available in the survey description document on the project's OSF page, which includes a CONSORT flow chart. This document also fully describes all treatment conditions and all pre- as well as post-treatment measures.

*Relationship of hypothesis to experimental design*

The survey was designed to evaluate the main hypothesis that information about the salaries of individuals in low-income occupations can affect support for redistributive policy. Four treatment conditions in this experiment presented such information; the control group presented no information. As a result, the main analysis of interest was the difference in support for redistribution between the four treatment conditions (analyzed jointly) and the control condition. 

The four treatment conditions additionally varied in whether they also asked respondents to estimate salaries before being presented with the information, and whether the low-income salaries were presented alongside high-income salaries for contrast. The differences between these conditions were analyzed as per pre-registered secondary research questions.

*Human subjects research*

This research was approved by the IRB's at Syracuse University (application number 21-135) and the University of Memphis (application number PRO-FY2021-469). The research was classified as "minimal harm", meaning that participation was considered to have no more risk of harm than those present in everyday life. No deception was used. Participants were asked for their consent before starting the survey (full consent form available in the survey description document on OSF). Participants were compensated by the survey company Lucid according to their prevailing market rates, either in cash or in alternative formats such as rewards program points. The researchers do not have access to information regarding exact compensation levels, but Lucid estimated that a typical compensation would be worth about USD 0.50. No IP addresses were collected during the experiment. In addition, exact time stamps, respondents' Lucid panel id numbers, and zip codes have been removed from the public version of the dataset.

*Sample characteristics*

The median age of respondents in the final sample was `r median(study2$age_1, na.rm=T)`. `r sum(study2$gender==1)/nrow(study2)*100`\% were male, `r sum(study2$ethnicity==1)/nrow(study2)*100`\% were white, `r sum(study2$ethnicity==2)/nrow(study2)*100`\% were Black, and `r sum(study2$hispanic!=1)/nrow(study2)*100`\% were Hispanic. `r sum(study2$education>4, na.rm=T)/nrow(study2)*100`\% had a college degree. The median household income was about \$40,000 - \$45,000 and the mean income was about \$50,000. `r sum(study2$democrat==1)/nrow(study2)*100`\% identify as Democrats (including leaners) and `r sum(study2$republican==1)/nrow(study2)*100`\% identify as Republicans (including leaners). Table A2 summarizes the demographic characteristics of the sample. 

```{r demographics table study 2, results = 'asis', echo=F}
knitr::kable(demographics_table_2, digits=0, caption = "Study 2 Demographics") %>%
    add_footnote("United States estimates derived from ACS 5 year estimates for 2018 and the 2016 ANES. Household income is a median (estimated from categorical variables) for the sample but a mean for the national population.") %>%
    kable_styling(latex_options = "striped")
```

`r sum(study2$any_contact==1, na.rm=T)/nrow(study2)*100`\% of respondents report knowing someone who works in at least one of the low-wage occupations. This includes `r sum(study2$any_friend==1, na.rm=T)/nrow(study2)*100`\% of respondents who report that a close friend or family member works in at least one of the five low-wage occupations. These shares did not meaningfully vary across experimental treatments.

Over-estimation of the salaries of low-income workers was common in the two experimental conditions in which respondents were asked to give such estimates. In the high-contrast condition, respondents guessed the incomes of three low-income and two high-income occupations. In this condition, `r sum(study2_cond1_hi$lowinc_overest_dummy==1, na.rm=T)/(sum(study2_cond1_hi$lowinc_overest_dummy==1, na.rm=T)+sum(study2_cond1_hi$lowinc_overest_dummy==0, na.rm=T))*100`\% of respondents over-estimated the salary of at least one low-income occupation. In the low-contrast condition, respondents guessed the salaries of five low-income occupations; `r sum(study2_cond2_lo$lowinc_overest_dummy==1, na.rm=T)/(sum(study2_cond2_lo$lowinc_overest_dummy==1, na.rm=T)+sum(study2_cond2_lo$lowinc_overest_dummy==0, na.rm=T))*100`\% of them over-estimated the salary of at least one occupation. 

`r sum(study2$health_overest_dummy==1, na.rm=T)/(sum(study2$health_overest_dummy==1, na.rm=T)+sum(study2$health_overest_dummy==0, na.rm=T))*100`\% of salary estimates for health care aides were higher than the actual salaries (median salary estimate \$`r median(study2$health_est, na.rm=T)`). The share of over-estimates was `r sum(study2$childcare_overest_dummy==1, na.rm=T)/(sum(study2$childcare_overest_dummy==1, na.rm=T)+sum(study2$childcare_overest_dummy==0, na.rm=T))*100`\% for childcare workers (median estimate \$`r median(study2$childcare_est, na.rm=T)`), `r sum(study2$retail_overest_dummy==1, na.rm=T)/(sum(study2$retail_overest_dummy==1, na.rm=T)+sum(study2$retail_overest_dummy==0, na.rm=T))*100`\% for retail sales clerks (median estimate \$`r median(study2$retail_est, na.rm=T)`), `r sum(study2$fastfood_overest_dummy==1, na.rm=T)/(sum(study2$fastfood_overest_dummy==1, na.rm=T)+sum(study2$fastfood_overest_dummy==0, na.rm=T))*100`\% for fast-food workers (median estimate \$`r median(study2$fastfood_est, na.rm=T)`), and `r sum(study2$waiter_overest_dummy==1, na.rm=T)/(sum(study2$waiter_overest_dummy==1, na.rm=T)+sum(study2$waiter_overest_dummy==0, na.rm=T))*100`\% for waiters (median estimate \$`r median(study2$waiter_est, na.rm=T)`).

# Appendix A3: Tables and Additional Figures of Main Results from Main Study (Study 2)

This section of the appendix includes the main results of Study 2 in table format. Table A4 shows the results of the main hypothesis test with and without covariates, asking whether information about the incomes of low-income workers affected support for redistribution. Table A5 shows the results of research questions 1 (does the inclusion of a contrast with high-income occupations matter) and 2 (does being asked to estimate salaries before seeing the information matter). Table A6 shows the results of research question 3 (do results vary by whether the respondent knows at least one person in any one of the low-income occupations they were asked about). Figures \ref{fig:H1_fig} - \ref{fig:RQ3_fig} present visualizations of the raw differences in means between treatment groups.

```{r figure H1 raw means grayscale, echo=F, include=T, warning=F, fig.cap="\\label{fig:H1_fig}Main results: impact of information on support for redistribution"}
my_colors <- c("#bdbdbd", "#f7f7f7") #used https://colorbrewer2.org/# to find good grays
H1_plot <- ggplot(study2, aes(x=factor(pure_control), y=policy_support, fill=factor(pure_control))) +
  geom_bar(stat="summary", fun.data="mean_se",position='dodge')+
  geom_errorbar(stat="summary", fun.data="mean_se", fun.args=list(mult=1.96), position=position_dodge(0.9), width=0.2) +
  coord_cartesian(ylim = c(0,1),
                  xlim = c(1, 2)) +
  ylab("Redistributive policy\nsupport") +
  xlab(NULL) +
  ggtitle("Effect of information \n on policy support") +
  theme(axis.text = element_text(size = 12),
        #axis.title.y = element_text(angle=0, vjust=0.5),
        axis.ticks.x = element_blank(),
        plot.title = element_text(hjust = 0.5)
        ) +
  scale_x_discrete(breaks=c("0","1"),
        labels=c("Information", "Control")) +
  scale_fill_manual(values = my_colors,
                         breaks=c("0","1"),
                         guide=F) +
  theme(text = element_text(size = 12))
H1_plot
```


```{r hypothesis test table , echo=F, results="asis"}
options(digits=3)
#table includes pre-registered regression test of H1 for primary and secondary outcome
h1_reg_prereg <- lm(policy_support ~ info_treatment + democrat + independent + age_1 + white + hispanic_dummy + hhi + college + male, data=study2)
h1_reg_prereg_secondary <- lm(lowinc_policy_support ~ info_treatment + democrat + independent + age_1 + white + hispanic_dummy + hhi + college + male, data=study2)

stargazer(h1_reg_prereg, h1_reg_prereg_secondary, covariate.labels = c("Info treatment", "Democrat", "Independent", "Age", "White", "Hispanic", "Household income", "College degree", "Male", "Constant"), dep.var.labels = c("Redistributive policy support", "Low-income policy support"), title="Experimental effects on support for redistribution", digits=2, header=F, star.cutoffs = c(0.05, 0.01, 0.001))
```

```{r research questions 1-2 table , echo=F, results="asis"}
options(digits=3)
#table includes pre-registered regression test of H1, and regressions for each RQ with the same set of covariates as pre-reg for H
rq1_reg_ctrls <- lm(policy_support ~ contrast_treatment + nocontrast_treatment  + democrat + independent + age_1 + white + hispanic_dummy + hhi + college + male, data=study2)
rq2_reg_ctrls <- lm(policy_support ~ estimates_treatment + noestimates_treatment  + democrat + independent + age_1 + white + hispanic_dummy + hhi + college + male, data=study2)


stargazer(rq1_reg_ctrls, rq2_reg_ctrls, covariate.labels = c("High contrast info", "Low contrast info", "Estimates", "No estimates", "Democrat", "Independent", "Age", "White", "Hispanic", "Household income", "College degree", "Male"), dep.var.labels = "Redistributive policy support", title="Treatment effects:\nasking for estimates and showing high-income contrast", digits=2, header=F, star.cutoffs = c(0.05, 0.01, 0.001))
```

```{r figure primary outcome all conds grayscale, echo=F, include=T, warning=F, fig.cap="\\label{fig:RQ1_fig}Results by various information treatment types"}
my_colors <- c("#636363", "#bdbdbd", "#f7f7f7")
all_conds_plot <- ggplot(study2, aes(x=factor(contrast_plot), y=policy_support, fill=factor(est_plot))) +
  geom_bar(stat="summary", fun.data="mean_se",position='dodge')+
  geom_errorbar(stat="summary", fun.data="mean_se", fun.args=list(mult=1.96), position=position_dodge(0.9), width=0.2) +
  coord_cartesian(ylim = c(0,1),
                  xlim = c(1,3)) +
  ylab("Redistributive policy\nsupport") +
  xlab(NULL) + 
  ggtitle("Effect of information types \n on policy support") +
  theme(axis.text = element_text(size = 12),
        plot.title = element_text(hjust = 0.5)#,
        #axis.title.y = element_text(angle=0, vjust=0.5)
        ) +
  scale_x_discrete(breaks=c("1","2","3"),
        labels=c("High contrast", "Low contrast", "Control")) +
  scale_fill_manual(values = my_colors,
                         breaks=c("1","2", "3"),
                         name=NULL,
                         labels=c("Asked estimates", "Not asked estimates", "Control")
                    ) +
  theme(text = element_text(size = 12)) 
all_conds_plot
```

```{r research question 3 table , echo=F, results="asis"}
options(digits=3)
#table includes pre-registered regression test of H1, and regressions for each RQ with the same set of covariates as pre-reg for H
rq3_reg_ctrls <- lm(policy_support ~ info_treatment + any_contact + info_treatment:any_contact + democrat + independent + age_1 + white + hispanic_dummy + hhi + college + male, data=study2)

stargazer(rq3_reg_ctrls, covariate.labels = c("Information treatment", "Any low-income contacts", "Democrat", "Independent", "Age", "White", "Hispanic", "Household income", "College degree", "Male", "Info treatment * Any contact"), dep.var.labels = "Redistributive policy support", title="Effect of information conditional on low-income personal contact", digits=2, header=F, star.cutoffs = c(0.05, 0.01, 0.001))
```

```{r figure primary outcome contacts only all conds grayscale, echo=F, include=T, warning=F, fig.cap="\\label{fig:RQ3_fig}Results for respondents with at least one low-income contact"}
my_colors <- c("#636363", "#bdbdbd", "#f7f7f7")
all_conds_contacts_plot <- ggplot(study2_contacts, aes(x=factor(contrast_plot), y=policy_support, fill=factor(est_plot))) +
  geom_bar(stat="summary", fun.data="mean_se",position='dodge')+
  geom_errorbar(stat="summary", fun.data="mean_se", fun.args=list(mult=1.96), position=position_dodge(0.9), width=0.2) +
  coord_cartesian(ylim = c(0,1),
                  xlim = c(1,3)) +
  ylab("Redistributive policy\nsupport") +
  xlab(NULL) + 
  ggtitle("Effect of information types on policy support", subtitle="Respondents with at least one contact in a low-wage occupation") +
  theme(axis.text = element_text(size = 12),
        plot.title = element_text(hjust = 0.5)#,
        #axis.title.y = element_text(angle=0, vjust=0.5)
        ) +
  scale_x_discrete(breaks=c("1","2","3"),
        labels=c("High contrast", "Low contrast", "Control")) +
  scale_fill_manual(values = my_colors,
                         breaks=c("1","2", "3"),
                         name=NULL,
                         labels=c("Asked estimates", "Not asked estimates", "Control")
                    ) +
  theme(text = element_text(size = 12)) 
all_conds_contacts_plot
```


# Appendix A4: Heterogeneous Effects by Party in Main Study (Study 2)

Table A7 shows the main results with the addition of an exploratory analysis of heterogenous effects by partisanship. The variable for partisanship is a dummy variable indicating that a respondent identifies as Republican, including respondents who lean Republican. The results are unchanged if a continuous indicator of partisanship is used instead.

```{r het effects table , echo=F, results="asis", warning=F}
options(digits=3)
#table shows exploratory analysis of het effects by partisanship
het_eff_reg_ctrls <- lm(policy_support ~ info_treatment + republican + info_treatment:republican + any_contact + age_1 + white + hispanic_dummy + hhi + college + male, data=study2)

stargazer(het_eff_reg_ctrls, covariate.labels = c("Information treatment", "Republican (dummy)", "Any low-income contacts", "Age", "White", "Hispanic", "Household income", "College degree", "Male","Info treatment * Republican"), dep.var.labels = "Redistributive policy support", title="Effect of information conditional on respondent partisanship", digits=2, header=F, star.cutoffs = c(0.05, 0.01, 0.001))
```